library(grid)
library(ggplot2)
setwd("/Users/Allan/Dropbox/!!Papers/1Reputation/ARPS Reputation Review/Quant Literature Search/Rep_Files")
set.seed(1234)


d1 <- read.csv("APSR.csv", header=F)
d2 <- read.csv("FA.csv", header=F)
d3 <- read.csv("IS.csv", header=F)
d4 <- read.csv("JCR.csv", header=F)

d <- rbind(d1, d2,d3, d4)

#Producing dataset
names(d)[c(1,2, 4, 5, 6,9, 12, 14, 15) ] <- c("Journal", "Terms","Type", "Author", 
                                              "Year", "Journal", "Volume", "Issue", "Pages")
d[1:3,]
names(d)[c(7) ] <- c("Title")

#Reordering columns
d <- d[, c("Terms","Year", "Journal", "Volume", "Issue", "Pages", "Author", "Title")]

hist(d$Year[d$Year>1998])
#Taking out years above 2009, since those years seem to have fewer counts than earlier years. 
d <- d[d$Year<2011,]

sum(d$Terms=="War")

#Creating unique identifier
uni <- cbind(d$Year, d$Volume, d$Issue, d$Pages)

du1 <- duplicated(uni, fromLast=FALSE)
du2 <- duplicated(uni, fromLast=TRUE)

#Indicator for if the observation is unique
un <-(1- du1) * (1-du2)

#un <- (1-duplicated(uni))*(1-duplicated(uni, fromLast=TRUE))
d <- cbind(un,d)



sum(d$un & d$Terms!="War")
sum(d$un & d$Terms!="War")/sum(d$Terms!="War")
#0% of not war terms are unique. This is as it should be.
sum(d$un & d$Terms=="War")
sum(d$un & d$Terms=="War")/sum(d$Terms=="War")
#36% of War terms are unique. As it should be. 

#Indicator for if there is an error in the data.
err <- as.numeric(d$Volume)
err[] <- 0
err[d$Terms!="War" & d$un==1] <- 1
sum(err)

#New Variable for Term
#This variable indicates which comparison is being made
#Essentially observations of just "War" are being replicated, once for each of the keywords
#This facilates producing the figure using ggplot. 
Term <- as.factor(d$Terms)
d <- cbind(Term, d)
d$Term[d$Terms=="War"] <- "Reputation"

d.war1 <- d[d$Terms=="War",]
d.war2 <- d[d$Terms=="War",]
d.war1$Term <- "Prestige"
d.war2$Term <- "Honor"


d <- rbind(d,d.war1, d.war2)

#Out is the outcome variable, 1 for articles that reference the particular keyword, 0 if not.
d$out <- rep(NA,length(d[,1]))
d$out[d$Terms=="Reputation" & d$Term=="Reputation"] <- 1 
d$out[d$Terms=="War"  &  d$Term=="Reputation"] <- 0

d$out[d$Terms=="Prestige" & d$Term=="Prestige"] <- 1 
d$out[d$Terms=="War" & d$Term=="Prestige"] <- 0

d$out[d$Terms=="Honor" & d$Term=="Honor"] <- 1 
d$out[d$Terms=="War" & d$Term=="Honor"] <- 0

#Reordering Term factor 
## To reorder the levels:
## note, if x is not a factor use levels(factor(x))
#x = factor(x,levels(x)[c(4,5,1:3)])
d$Term = factor(d$Term,levels(d$Term)[c(c(8,1,5),c(1:15)[-c(8,1,5)])])

#Directory for saving image
setwd("/Users/Allan/Dropbox/!!Papers/1Reputation/ARPS Reputation Review/Renshon-Reputation/Drafts/Images")

#Restriction on dataset for Figure 1
r <- d$Terms %in% c("Reputation", "Honor", "Prestige", "War")

#Creating labels
journal.names <- as.factor(as.character(rep(c("APSR", "FA", "IS", "JCR"), 3)))
xvalues <- c(1915, 1925, 2007, 1960,       1915, 1925, 1974, 1960,      1915, 1925, 1975, 1960)  
#  rep(c(1915, 1925, 1975, 1960),3)
yvalues <- c(0.1, 0.03, 0.23, 0.055,      0.13, 0.16, 0.02, 0.06,      0.06, 0.18, 0.035, 0.11)
Term <- as.factor(as.character(c(rep("Reputation", 4), rep("Honor", 4), rep("Prestige", 4))))
text.data <- data.frame(journal.names, xvalues, yvalues, Term)

#Figure 1, faceted by columns
p1 <- ggplot(d[r,], aes(x=Year,y=out, colour=Journal)) + facet_grid(Term ~ .) + geom_smooth(aes(fill=Journal), alpha = 0.06, size=1, method="loess", span=0.55) +
  theme_bw() + ggtitle("# Articles Referencing 'KEYWORD AND War' / # 'War' ") +coord_cartesian(ylim=c(0,.25), xlim=c(1910,2011)) +
  ylab("Proportion of Articles") + geom_text(aes(x=xvalues, y=yvalues, label=journal.names, Term=Term, colour=journal.names), data=text.data, size=5) +
theme(panel.margin = unit(0.3, "cm")) + theme(strip.text.x = element_text(size = 12)) + theme(strip.background = element_rect(fill = "white")) + theme(legend.position="none")
#p1
ggsave(filename="fig_overallv.pdf", plot=p1, height=9, width=8, units="in")
#+ theme(legend.position=c(.1,0.85), legend.background = element_rect(fill = "#ffffffaa", colour = NA)) +


##Inspecting Prestige in recent years
View(d[d$Year>2008 & d$Terms=="Prestige",])
hist(d$Year[d$Year>2004 & d$Terms=="Prestige"])
hist(d$Year[d$Year>2004 & d$Terms=="War" & d$Term=="Prestige"])


